// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 2 as
// published by the Free Software Foundation.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

#include "hw_vi.h"
#include "messages.h"
#include "hw_emutime.h"
#include "stringCommons.h"

#define INSTANTIATE_WDI(number) \
	template void HwVI::ww_di<number>(Hardware *h, WORD offset, DWORD data);\
	template void HwVI::wh_di_high<number>(Hardware *h, WORD offset, WORD data);\
	template void HwVI::wh_di_low<number>(Hardware *h, WORD offset, WORD data);
VIDEO_INTERRUPTS(INSTANTIATE_WDI);

WORD HwVI::rh_vct(Hardware *h, WORD offset) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == 0x202C);
	if(doVCT(h)) if(!h->m_gp_activated) {
		/*if(g::verbose && g::degub_msg) {
		DEGUB("posting LM_VSYNC:VST_VCT_READ\n");
		}*/
		h->postVsync(VST_VCT_READ);
		/*if(g::verbose) {
		DEGUB("LM_VSYNC:VST_VCT_READ posted.\n");
		}*/
	}
	WORD vct = h->hrh(offset) & ~3;
	if(g::gp_log && g::verbose) {
		DEGUB("Hardware rh: 0x%04X, 0x%04X\n", offset, vct);
	}
	return vct;
}

WORD HwVI::rh_hct(Hardware *h, WORD offset) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == 0x202E);
	//I don't think emulation of this is necessary, given the new info in yagcd. We'll see.
	//Seems some early demos want it for their vsync.
	//No need for anything fancy here, I think.
	//Eventually, the other timing modes (realtime/ereal) should be emulated as well.
	WORD hct = (h->hrh(offset) % 640) + 4;
	//DEGUB("Hardware rh: 0x%04X, 0x%04X\n", offset, hct);
	if(hct <= 4 && !h->m_gp_activated) {
		/*if(g::verbose && g::degub_msg) {
		DEGUB("posting LM_VSYNC:VST_HCT_READ\n");
		}*/
		h->postVsync(VST_HCT_READ);
		/*if(g::verbose) {
		DEGUB("LM_VSYNC:VST_HCT_READ posted.\n");
		}*/
	}
	//hct &= ~3;	//wtf is this?
	if(g::gp_log && g::verbose) {
		DEGUB("Hardware rh: 0x%04X, 0x%04X\n", offset, hct);
	}
	h->hwh(offset, hct);
	return hct;
}

void HwVI::ww_xfb(Hardware *h, WORD offset, DWORD data) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == 0x201C || offset == 0x2024);
	h->hww(offset, data);
	//if(h->vi.fba_writes < 10) {
	GPDEGUB("Hardware ww: 0x%04X, 0x%08X\n", offset, data);
	//}
	//h->vi.fba_writes++;
	doXFB(h, (offset == 0x201C) ? 0 : 1, data);
	//if(offset == 0x201C)
	//h->vi.initalized = true;
	if(!h->m_gp_activated && offset == 0x201C) {
		h->postVsync(VST_FBA_WRITE);
	}
}
void HwVI::wh_xfb_high(Hardware *h, WORD offset, WORD data) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == 0x201C || offset == 0x2024);
	GPDEGUB("Hardware wh: 0x%04X, 0x%04X\n", offset, data);
	h->hwh(offset, data);
	doXFB(h, (offset == 0x201C) ? 0 : 1, h->hrw(offset));
}
void HwVI::wh_xfb_low(Hardware *h, WORD offset, WORD data) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == 0x201E || offset == 0x2026);
	GPDEGUB("Hardware wh: 0x%04X, 0x%04X\n", offset, data);
	h->hwh(offset, data);
	doXFB(h, (offset == 0x201E) ? 0 : 1, h->hrw(offset - 2));
}
void HwVI::doXFB(Hardware *h, int slot, DWORD data) {
	DWORD address;
	if(data & 0x80000000)
		address = data;
	else if(data & 0x10000000)
		address = (data & 0x00FFFFFF) << 5;
	else
		address = data & 0x00FFFFFF;
	if(h->vi.xfb[slot] != address) {
		//if(h->vi.fba_writes < 10) {
		GPDEGUB("XFB physical address %i changed to 0x%08X\n", slot+1, address);
		//}
		h->vi.xfb[slot] = address;
		h->vi.xfb_changed_since_last_vsync = true;
	}
}

void HwVI::wh_vtr(Hardware *h, WORD offset, WORD data) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == 0x2000);
	if(data != h->hrh(offset)) {
		DEGUB("VI ACV changed: 0x%04X\n", offset, data);
		h->hwh(offset, data);
		doACV(h, data);
	}
}

void HwVI::wh_dcr(Hardware *h, WORD offset, WORD data) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == 0x2002);
	DEGUB("Hardware wh: 0x%04X, 0x%04X\n", offset, data);
	h->hwh(offset, data);
	doDCR(h, data);
}

void HwVI::ww_cr(Hardware *h, WORD offset, DWORD data) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == 0x2000);
	DEGUB("Hardware ww: 0x%04X, 0x%08X\n", offset, data);
	h->hww(offset, data);	//the order of hw/do* can be debated
	doACV(h, WORD(data >> 16));
	doDCR(h, (WORD)data);
}

const char *Hardware::getGfxModeString() {
	const char *modes[4] = { "NTSC", "PAL50", "MPAL", "DEBUG" };
	return modes[vi.mode];
}

void HwVI::doACV(Hardware *, WORD data) {
	DEGUB("Video ACV: %i (half?)lines. EQU: %i\n", getbitsr(data, 13, 4),
		getbitsr(data, 3, 0));
	//h->m_visi_limit = h->m_cc.cycles + VISI_TIMEOUT;
}

void HwVI::doDCR(Hardware *h, WORD data) {
	const char *LE[4] = { "Off", "On for 1 field", "On for 2 fields", "Always on" };
	h->vi.mode = getbitsr(data, 9, 8);
	//h->vi.initalized = getbitr(data, 0);
	DEGUB("Video Mode: %s. LE0: %s. LE1: %s. 3D mode: %s. %sInterlaced. %s.\n",
		h->getGfxModeString(), LE[getbitsr(data, 7, 6)],
		LE[getbitsr(data, 5, 4)], getbitr(data, 3) ? "yes" : "no",
		getbitr(data, 2) ? "Non-" : "", Abled(getbitr(data, 0)));
	if(getbitr(data, 1)) {
		DEGUB("Video Reset!\n");
	}
	if(g::bouehr && getbitr(data, 3))
		throw bouehr_exception("3D mode not supported!");
	if(getbitr(data, 0) && g::timing_mode == g::TM_EXACT_FAST) {
		h->vi_simple_interrupt();
	}
}

template<WORD number> void HwVI::ww_di(Hardware *h, WORD offset, DWORD data) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == VI_DI(number));
	DWORD old = h->hrw(offset);
	CLEARINTFLAGS(old, data, VI_DIINT);
	h->hww(offset, data);
	if(MASKED_NEQUAL(old, data, 0x13FF03FF)) {
		//DEGUB("Hardware ww: 0x%04X, 0x%08X\n", offset, data);
		GPDEGUB("Display interrupt %i %s, v = %i, h = %i\n",
			number, abled(getbitr(data, 28)), getbitsr(data, 25, 16),
			getbitsr(data, 9, 0));
		set_viint<number>(h);
	}
}

template<WORD number> void HwVI::wh_di_high(Hardware *h, WORD offset, WORD data) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == VI_DI(number));
	WORD old = h->hrh(offset);
	CLEARINTFLAGS(old, data, 0x8000);
	h->hwh(offset, data);
	if(MASKED_NEQUAL(old, data, 0x13FF)) {
		GPDEGUB("Display interrupt %i %s, v = %i\n",
			number, abled(getbitr(data, 28 - 16)),
			getbitsr(data, 25 - 16, 16 - 16));
		set_viint<number>(h);
	}
}

template<WORD number> void HwVI::wh_di_low(Hardware *h, WORD offset, WORD data) {
	ADDITIVE_TIMING(&h->m_hwt[HWT_VI]);
	MYASSERT(offset == VI_DI(number) + 2);
	WORD old = h->hrh(offset);
	h->hwh(offset, data);
	if(MASKED_NEQUAL(old, data, 0x03FF)) {
		GPDEGUB("Display interrupt %i, h = %i\n", number, getbitsr(data, 9, 0));
		set_viint<number>(h);
	}
}

//returns recommended frame delay in milliseconds
DWORD Hardware::getVDelay() {
	return 1000/FRAMES_PER_SECOND_MODED(vi.mode);
}

//returns true on vsync
bool HwVI::doVCT(Hardware *h) {
	if(g::timing_mode == g::TM_REALTIME) {
		ULI rtc;
		GET_RTC(&rtc);
		VIFRAME vif;
		MODED(RTC_2_VIFRAME)(rtc, &vif);
		MYASSERT(vif.rawframe < rtc);
		h->hwh(0x202C, vif.newline);
		bool vsync = rtc.isAfter(h->vi.next_vct_vsync);
		if(vsync) {
			h->vi.next_vct_vsync = MODED(RAWFRAME_2_RTC)(vif.rawframe + 1);
			//MYASSERT(h->vi.next_vct_vsync > rtc);
		}
		return vsync;
	} else if(g::timing_mode == g::TM_EXACT_FAST) {
		WORD oldline = h->hrh(0x202C);
		h->hwh(0x202C, (oldline > LINES_PER_FRAME) ? 1 : oldline + 1);
		return (oldline > LINES_PER_FRAME);
	} else if(g::timing_mode == g::TM_EXACT_REAL) {
		VIFRAME vif;
		MODED(CYCLES_2_VIFRAME)(h->m_cc.cycles, &vif);
		h->hwh(0x202C, vif.newline);
		bool vsync = h->m_cc.cycles.isAfter(h->vi.next_vct_vsync);
		if(vsync) {
			h->vi.next_vct_vsync = MODED(RAWFRAME_2_CYCLES)(vif.rawframe + 1);
			MYASSERT(h->vi.next_vct_vsync > h->m_cc.cycles);
		}
		return vsync;
	} else
		throw generic_fatal_exception("BFE!");
}

template<WORD number> void HwVI::set_viint(Hardware *h) {
	if(g::timing_mode != g::TM_EXACT_FAST) {
		DWORD di = h->hrw(VI_DI(number));
		h->interrupt.remove_events(
			event_test<(event_callback)(hw_callback)vi_event<number> >());
		if(getflag(di, VI_DIENB) && !getflag(di, VI_DIINT)) {
			ULI uli;
			DWORD vline = getbitsr(di, 25, 16);//, hline = getbitsr(di, 9, 0);
			if(g::timing_mode == g::TM_REALTIME) {
				GET_RTC(&uli);
				uli += ET_PER_FRAME + MODED(LINES_2_RTC)(vline);
				//+ ((hline * g::rtcf) / PIXELS_PER_SECOND);  //we don't emulate hlines yet
			} else {	//TM_EXACT_REAL
				uli = h->m_cc.cycles + ET_PER_FRAME + MODED(LINES_2_CYCLES)(vline);
			}
			h->interrupt.add_event(vi_event<number>, uli,
				STRING_PLUS_DIGIT("VI event ", number));
		}
	}
}

template<WORD number> void HwVI::vi_event(Hardware *h) {
	MYASSERT(g::timing_mode != g::TM_EXACT_FAST);
	if(g::timing_mode != g::TM_EXACT_FAST) {
		//VDEGUB("VISI test: 0x%016I64x(%I64u) >= 0x%016I64x(%I64u)\n", h->m_cc.cycles,
		//h->m_cc.cycles, h->m_visi_limit, h->m_visi_limit);
		if(h->m_cc.cycles >= h->m_visi_limit) {
			DWORD di = h->hrw(VI_DI(number));
			raiseVI(h, di, number);
		}
		h->interrupt.add_event(vi_event<number>,
			h->gekko_interrupt.event_time() + ET_PER_FRAME,
			STRING_PLUS_DIGIT("VI event ", number));
	}
}

bool Hardware::getScreen(BYTE *screen_yuyv) {
	ADDITIVE_TIMING(&m_hwt[HWT_VI]);
	//if(!gfx_initialized())
	//return false;
	if(screen_yuyv == NULL)
		return false;

	//try {
	//this is good, in 2d mode... no it's not. it's imprecise and not dual-compatible!
	/*if(g::realtime && !m_gp_activated) {
	if(g::log_interrupts) {
	DEGUB("getScreen VISI\n");
	}
	vi_simple_interrupt();
	vi_si_done = true;
	}*/

	//if(sa1 == sa2 || sa1 + 640*2 == sa2) {
	//Both addresses point to the same memory space or are in sync
	//BYTE *src = m.getp_physical(vi.xfb[0], 640*480*2);
	//BYTE *src = m.getp_translated(vi.xfb[0], 640*480*2);
	//Timing timing("getScreen memcpy");  //This takes a lot of time!
	//memcpy(screen_yuyv, src, 640*480*2);
	//DEGUB("in getScreen: m.read(%08X, %08X, %08X);\n", vi.xfb[0], 640*480*2, screen_yuyv);

	//changed to fix gc-linux, then everything else
	const void *src;
	if(vi.xfb[0] < MAIN_MEMORY_SIZE) {
		src = m.getp_physical(vi.xfb[0], 640*480*2);
	} else {
		src = m.getp_translated(vi.xfb[0], 640*480*2);
	}
	memcpy(screen_yuyv, src, 640*480*2);

	//getscreen_since_last_vsync = true;
	/*} else {
	//Interleave lines from the framebuffers
	for(DWORD i=0; i<640*480*2; i+=640*2*2) {
	m.read_cached(sa1 + i, 640*2, screen_yuyv + i);
	m.read_cached(sa2 + i, 640*2, screen_yuyv + i + 640*2);
	}
	}*/
	/*} catch(generic_fatal_exception &e) {
	DEGUB("generic_fatal_exception in getScreen: %s\n", e.what());
	return false;
	} catch(page_fault_exception &e) {
	DEGUB("page_fault_exception in getScreen: %s\n", e.what());
	return false;
	}*/

	return true;
}

//YCbYCr -> RGB
inline void doline(DWORD src_y, DWORD dst_y, const BYTE *screen, BYTE *screen_rgb);

void convertYUYV2RGB(BYTE *screen_rgb, const BYTE *screen_yuyv) {
	if(IsBadReadPtr(screen_rgb, 640*480*3))
		throw generic_fatal_exception("BadReadPtr error no.1");
	if(IsBadReadPtr(screen_yuyv, 640*480*2))
		throw generic_fatal_exception("BadReadPtr error no.2");

	for(DWORD y=0; y<480; y++)
		doline(y, y, screen_yuyv, screen_rgb);
}

inline void doline(DWORD src_y, DWORD dst_y, const BYTE *screen, BYTE *screen_rgb) {
	DWORD x, i, j;
	//x isn't used for anything other than loop counting
	for(x=0, i=src_y*640*2, j=dst_y*640*3; x<160; x++, i+=2*4, j+=3*4) {
		MYASSERT(i+7 < 640*480*2);
		MYASSERT(j+11 < 640*480*3);
		//Four pixels at a time, for optimized memory transfer
		/*R = Y                         + 1.40200 * (Cr - 0x80)
		*G = Y - 0.34414 * (Cb - 0x80) - 0.71414 * (Cr - 0x80)
		*B = Y + 1.77200 * (Cb - 0x80)*/

		//Big-endian; the YCbYCr DWORD is reversed. (?)
		//Interlaced? No.
#define Y1 int(screen[i])
#define Cb8 int(screen[i+1] - 0x80)
#define Y2 int(screen[i+2])
#define Cr8 int(screen[i+3] - 0x80)
#define Y1_2 int(screen[i+4])
#define Cb8_2 int(screen[i+5] - 0x80)
#define Y2_2 int(screen[i+6])
#define Cr8_2 int(screen[i+7] - 0x80)
#define ZBYTE(f) (((f) < 0) ? 0 : (f > 0xFF) ? 0xFF : BYTE(f))
#define EXPONENT 8
#define FACTOR (1 << EXPONENT)
#define REDBYTE(y, cb8, cr8) ZBYTE((FACTOR*y + int(1.40200f*FACTOR) * cr8) >> EXPONENT)
#define GREENBYTE(y, cb8, cr8) ZBYTE((FACTOR*y - int(0.34414f*FACTOR) * cb8 -\
	int(0.71414f*FACTOR) * cr8) >> EXPONENT)
#define BLUEBYTE(y, cb8, cr8) ZBYTE((FACTOR*y + int(1.77200f*FACTOR) * cb8) >> EXPONENT)

		MAKE(DWORD, screen_rgb[j+0]) = BLUEBYTE(Y1, Cb8, Cr8) |
			(GREENBYTE(Y1, Cb8, Cr8) << 8) | (REDBYTE(Y1, Cb8, Cr8) << 16) |
			(BLUEBYTE(Y2, Cb8, Cr8) << 24);
		MAKE(DWORD, screen_rgb[j+4]) = GREENBYTE(Y2, Cb8, Cr8) |
			(REDBYTE(Y2, Cb8, Cr8) << 8) | (BLUEBYTE(Y1_2, Cb8_2, Cr8_2) << 16) |
			(GREENBYTE(Y1_2, Cb8_2, Cr8_2) << 24);
		MAKE(DWORD, screen_rgb[j+8]) = REDBYTE(Y1_2, Cb8_2, Cr8_2) |
			(BLUEBYTE(Y2_2, Cb8, Cr8) << 8) | (GREENBYTE(Y2_2, Cb8_2, Cr8_2) << 16) |
			(REDBYTE(Y2_2, Cb8_2, Cr8_2) << 24);
	}
}
//YCbYCr -> XRGB
inline void doXline(DWORD src_y, DWORD dst_y, const BYTE *screen, BYTE *screen_rgb);

void convertYUYV2XRGB(BYTE *screen_xrgb, const BYTE *screen_yuyv) {
	for(DWORD y=0; y<480; y++)
		doXline(y, y, screen_yuyv, screen_xrgb);
}

inline void doXline(DWORD src_y, DWORD dst_y, const BYTE *screen, BYTE *screen_xrgb) {
	DWORD x, i, j;
	//x isn't used for anything other than loop counting
	for(x=0, i=src_y*640*2, j=dst_y*640*4; x<320; x++, i+=2*2, j+=4*2) {
		//Two pixels at a time, for optimized memory transfer

		MAKE(DWORD, screen_xrgb[j+0]) = BLUEBYTE(Y1, Cb8, Cr8) |
			(GREENBYTE(Y1, Cb8, Cr8) << 8) | (REDBYTE(Y1, Cb8, Cr8) << 16);
		MAKE(DWORD, screen_xrgb[j+4]) = BLUEBYTE(Y2, Cb8, Cr8) |
			(GREENBYTE(Y2, Cb8, Cr8) << 8) | (REDBYTE(Y2, Cb8, Cr8) << 16);
	}
}

/*void convertXRGB2RGB(BYTE *screen_rgb, const BYTE *screen_xrgb) {
DWORD x, i, j;
for(DWORD y=0; y<480; y++) {
for(x=0, i=y*640*3, j=y*640*4; x<640; x++, i+=3, j+=4) {
screen_rgb[i] = screen_xrgb[j];
screen_rgb[i + 1] = screen_xrgb[j + 1];
screen_rgb[i + 2] = screen_xrgb[j + 2];
}
}
}*/
